# IncumbentsAnalysis_09202015.R 
# This file replicates the analyses and figures in 'Incumbency Advantage and Family Success in Local Elections' that pertain to Barangay Captain Incumbency.


rm(list=ls())

library(stringr)
library(gtools)
library(plyr)
library(rdd)
library(rdrobust)
library(pscl)
library(stargazer)
library(xtable)
library(doBy)
library(dplyr)
library(multiwayvcov)
library(clusterSEs)
library(ggplot2)
library(grid)
library(gridExtra)
library(ggthemes)



# SET YOUR WD (THE DIRECTORY TO THE FOLDER R&P_09202015) HERE:
	setwd("~/Dropbox/R&P_09202015")

# Call Custom Functions
	source('functions/plot.RD.R')
	source('functions/rdpoints.R')
	source('functions/rdplot2.R')

# Load Incumbent-Level Data
incumbents <- read.csv("data/incumbents.csv")

# ----- SAMPLE One Candidate Per Barangay ----- #
by_psgc <- group_by(incumbents, PSGC)
set.seed(25001)
samp_inc <- sample_n(by_psgc, 1)
length(unique(samp_inc$PSGC))
length(unique(incumbents$PSGC))

# ----------------------------------- #
# ----- Non-parametric Analyses ----- #
# ----------------------------------- #


# --------------- DV 1: Probability of running in t+1 --------------------- #
RD1a <- RDestimate(formula= ran13 ~ MV, cutpoint=0, subset=samp_inc$term!=3, data=samp_inc, cluster=as.factor(samp_inc$province.10), model=TRUE, bw=NULL)
summary(RD1a)
plot(RD1a)

pdf("figures/RD1a.pdf", width=7.5, height=5)
plot.RD(RD1a, ciCol="black", midCol="red", alpha=.1, range=c(-0.4,0.4))
abline(v=0, col="black")
title(main="Candidate Entry in Next Election", ylab="Candidate Entry, Election t+1", xlab="Margin of Victory, Election t")
rdpoints(y=samp_inc$ran13, x=samp_inc$MV, data=samp_inc, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()



	# ----- OPTION: Conditionality ----- #
	# Make models conditional - change 0s to NAs if s/he did not run again.
	samp_inc$voteShare[samp_inc$ran13==0] <- NA
	samp_inc$win[samp_inc$win==0] <- NA
	# ---------------------------------- #



# ----------------------- DV 2: Vote Share in t+1 ------------------------- #
RD1b <- RDestimate(formula= voteShare ~ MV, cutpoint=0, subset=samp_inc$term!=3, data=samp_inc, cluster=as.factor(samp_inc$province.10), model=TRUE, bw=NULL)
summary(RD1b)
plot(RD1b)

pdf("figures/RD1b.pdf", width=7.5, height=5)
plot.RD(RD1b, ciCol="black", midCol="red", alpha=.1, range=c(-0.4,0.4))
abline(v=0, col="black")
title(main="Vote Share in Next Election", ylab="Vote Share, Election t+1", xlab="Margin of Victory, Election t")
rdpoints(y=samp_inc$voteShare, x=samp_inc$MV, data=samp_inc, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()



# ------------ DV 3: Probability of running and winning in t+1 ------------- #
RD1c <- RDestimate(formula= win ~ MV, cutpoint=0, subset=samp_inc$term!=3, data=samp_inc, cluster=as.factor(samp_inc$province.10), model=TRUE, bw=NULL)
summary(RD1c)
plot(RD1c)

pdf("figures/RD1c.pdf", width=7.5, height=5)
plot.RD(RD1c, ciCol="black", midCol="red", alpha=.1, range=c(-0.4,0.4))
abline(v=0, col="black")
title(main="Candidate Entry and Win in Next Election", ylab="Candidate Entry and Win, Election t+1", xlab="Margin of Victory, Election t")
rdpoints(y=samp_inc$win, x=samp_inc$MV, data=samp_inc, subset=NULL, c=0, cex=.6, col.bin="black")
dev.off()




# ---------------------------- #
# ----- CUSTOM RD TABLES ----- #
# ---------------------------- #
# Functions
xtRdd <- function(model, dv.label="DV:", lcol=TRUE){
	require(tables)
	ml <- lapply(model$model, function(x){summary(x)$coefficients})
	ml <- lapply(ml, function(x){x[2,]})
	ml <- lapply(ml, function(x){matrix(x, dimnames=list(c(),c(paste(dv.label))))})
	mall <- do.call("rbind",ml)
		mall <- data.frame(rep(c("Estimate","Std. Error","t value","Pr(>|t|)"),3), mall)
		mall <-  cbind(c("100% BW","","","","50% BW","","","","200% BW","","",""),mall)

		colnames(mall) <- NULL;rownames(mall) <- NULL

# Add Stars
		pvals <- unlist(mall[c(4,8,12),3])
		mall <- cbind(mall, sig=rep(NA,length(mall[,1]))) 
		mall[c(1,5,9),4] <- symnum(pvals, corr = FALSE, cutpoints = c(0, .001,.01, .05, .1, 1), symbols = c("***","**","*","."," "))


	
	# Add BW
		mall[c(3,7,11),3] <- (model$bw)
			mall[,2] <- as.character(mall[,2])
			mall[c(3,7,11),2] <- "Bandwidth"
	# Add N
		mall[c(4,8,12),3] <- model$obs
			mall[c(4,8,12),2] <- "Observations"
	# Round Numeric Column
		mall[,3] <- round(mall[,3],3)

	# Prettyfy SEs 
		mall[c(2,6,10),3]<- paste0("(",mall[c(2,6,10),3],")")

	# Column Names
		# colnames(mall) <- NULL
		# colnames(mall)[length(colnames(mall))] <- dv.label

	# Print with line breaks
	if(lcol==TRUE){
		mall <- mall # , align="l|lrrl", type="html")
	}
	else{mall <- mall[,-c(1:2)]} # , align="|rrl", type="html")
	
	return(mall)
}

a <- cbind(xtRdd(model=RD1a), xtRdd(model=RD1b, lcol=FALSE), xtRdd(model=RD1c, lcol=FALSE))
colnames(a) <- NULL
addtorow <- list()
	addtorow$pos <- list(-1,2,6,10)
	addtorow$command <- c("\\multicolumn{2}{c}{} & \\multicolumn{2}{c}{Running for Office} & \\multicolumn{2}{c}{Vote Share} & \\multicolumn{2}{c}{Winning} \\\\", "& & &  & & & & \\\\", "& & &  & & & & \\\\", "& & &  & & & & \\\\")

print.xtable(xtable(a, label="tab:rd1", digits=3, align="lll|rl|rl|rl", caption="Regression discontinuity results. Local linear regressions are performed to either side of the cutpoint using triangular kernels. Results at 100\\%, 50\\% and 200\\% of the Imbens-Kalyanaraman optimal bandwidths are displayed. Standard errors are clustered by province. * denotes significance at the 10\\%, ** at the 5\\% and, *** at the 1\\% level."), floating.environment="table*", digits = c(0, rep(3, ncol(a))), hline.after=c(0,4,8,12), include.rownames = FALSE, include.colnames = FALSE, type="latex", file="tables/RD1.tex", add.to.row=addtorow)




# ------------------------------- #
# ----- Parametric Analyses ----- #
# ------------------------------- #

#library(multiwayvcov)
#library(clusterSEs)

# Create won10 dummy
samp_inc$won10 <- 0
samp_inc$won10[samp_inc$rank.10==1] <- 1

# RD1a
summary(m1a <- glm(data=samp_inc[abs(samp_inc$MV)<RD1a$bw[1],], formula=ran13 ~ won10 + MV, family="binomial", subset=samp_inc$term!=3))

	(m1ases <-cluster.vcov(m1a, factor(samp_inc[which(abs(samp_inc$MV)<=RD1a$bw[1]),]$province.10[samp_inc$term!=3])))
	m1ases <- coeftest(m1a, m1ases)[,2]

summary(m1aOLS <- glm(data=samp_inc, formula=ran13 ~ won10 + MV, family="binomial", subset=samp_inc$term!=3))
	m1aOLSses <-cluster.vcov(m1aOLS, factor(samp_inc$province.10[samp_inc$term!=3]))
	m1aOLSses <- coeftest(m1aOLS, m1aOLSses)[,2]


# RD1b
summary(m1b <- lm(data=samp_inc[abs(samp_inc$MV)<RD1b$bw[1],], formula=voteShare ~ won10 + MV, subset=samp_inc$term!=3))

	(m1bses <-cluster.vcov(m1b, factor(samp_inc[which(abs(samp_inc$MV)<=RD1b$bw[1]),]$province.10[samp_inc$term!=3])))
	m1bses <- coeftest(m1b, m1bses)[,2]

summary(m1bOLS <- lm(data=samp_inc, formula=ran13 ~ won10 + MV, subset=samp_inc$term!=3))
	m1bOLSses <-cluster.vcov(m1bOLS, factor(samp_inc$province.10[samp_inc$term!=3]))
	m1bOLSses <- coeftest(m1bOLS, m1bOLSses)[,2]

# RD1b
summary(m1c <- glm(data=samp_inc[abs(samp_inc$MV)<RD1c$bw[1],], formula=win ~ won10 + MV, family="binomial", subset=samp_inc$term!=3))

	m1cses <-cluster.vcov(m1c, factor(samp_inc[which(abs(samp_inc$MV)<=RD1c$bw[1]),]$province.10[samp_inc$term!=3]))
	m1cses <- coeftest(m1c, m1cses)[,2]

summary(m1cOLS <- glm(data=samp_inc, formula=win ~ won10 + MV, family="binomial", subset=samp_inc$term!=3))	
	m1cOLSses <-cluster.vcov(m1cOLS, factor(samp_inc$province.10[samp_inc$term!=3]))
	m1cOLSses <- coeftest(m1cOLS, m1cOLSses)[,2]


stargazer(m1aOLS, m1a, m1bOLS, m1b, m1cOLS, m1c, out="tables/ols1.tex", type="latex", digits=3, label="tab:ols1", dep.var.labels = c("Re-running","Vote Share","Re-election"), covariate.labels=c("Margin of Victory, 2010", "Win, 2010", "Constant"), title="Parametric analysis of the effect of incumbency on Barangay Captain re-running, vote share and re-election. Standard errors are clustered by province. Uses rectangular kernel weights. Models 1, 3, and 5 use the entirity of the data, while 2, 4, and 6 use only points within the IK Bandwith. The coefficient of interest is `Win, 2010'.", add.lines=list(c("BW",1,round(RD1a$bw[1],3), 1 ,round(RD1b$bw[1],3), 1, round(RD1c$bw[1],3))), omit.stat=c("f","ser","ll","aic"), se=list(m1aOLSses,m1ases,m1bOLSses,m1bses,m1cOLSses,m1cses), float.env="table*", model.names=TRUE)






rm(m1aOLS, m1a, m1bOLS, m1b, m1cOLS, m1c, m1aOLSses, m1ases, m1bOLSses, m1bses, m1cOLSses, m1cses)

# ----- Add Controls ----- #
samp_inc$lnpop2010 <- log(samp_inc$pop2010)
	samp_inc$lnpop2010[samp_inc$lnpop2010=="-Inf"] <- NA
samp_inc$urbanRural <- droplevels(samp_inc$urbanRural)


# RD1a
summary(m1a <- glm(data=samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<RD1a$bw[1]),], formula=ran13 ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term)))

	(m1ases <-cluster.vcov(m1a, matrix(samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<RD1a$bw[1] & samp_inc$term!=3),]$province.10)))
	m1ases <- coeftest(m1a, m1ases)[,2]

summary(m1aOLS <- glm(data=samp_inc[which(samp_inc$term!=3),], formula=ran13 ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term), family="binomial"))
	(m1aOLSses <-cluster.vcov(m1aOLS, factor(samp_inc[which(samp_inc$term!=3),]$province.10)))
	m1aOLSses <- coeftest(m1aOLS, m1aOLSses)[,2]


# RD1b
summary(m1b <- lm(data=samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<RD1b$bw[1]),], formula = voteShare ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term)))

	(m1bses <-cluster.vcov(m1b, as.matrix(samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<=RD1b$bw[1]),]$province.10)))
	m1bses <- coeftest(m1b, m1bses)[,2]

summary(m1bOLS <- lm(data=samp_inc[which(samp_inc$term!=3),], formula=ran13 ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term)))
	(m1bOLSses <-cluster.vcov(m1bOLS, as.matrix(samp_inc[which(samp_inc$term!=3),]$province.10)))
	m1bOLSses <- coeftest(m1bOLS, m1bOLSses)[,2]

# RD1b
summary(m1c <- glm(data=samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<RD1c$bw[1]),], formula=win ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term), family="binomial"))

	(m1cses <-cluster.vcov(m1c, factor(samp_inc[which(samp_inc$term!=3 & abs(samp_inc$MV)<=RD1c$bw[1]),]$province.10)))
	m1cses <- coeftest(m1c, m1cses)[,2]

summary(m1cOLS <- glm(data=samp_inc[which(samp_inc$term!=3),], formula=win ~ won10 + MV + urbanRural + lnpop2010 + gender.10 + as.factor(term), family="binomial"))	
	(m1cOLSses <-cluster.vcov(m1cOLS, factor(samp_inc[which(samp_inc$term!=3),]$province.10)))
	m1cOLSses <- coeftest(m1cOLS, m1cOLSses)[,2]

stargazer(m1aOLS, m1a, m1bOLS, m1b, m1cOLS, m1c, out="tables/ols1_wControls.tex", type="latex", digits=3, label="tab:ols1_wControls", dep.var.labels = c("Re-running","Vote Share","Re-election"), covariate.labels=c("Margin of Victory, 2010", "Win, 2010","Urban","Population (ln), 2010","Male","2nd Term","Constant"), title="Parametric analysis of the effect of incumbency on Barangay Captain re-running, vote share and re-election, including additional controls. Standard errors are clustered by province. Uses rectangular kernel weights. Models 1, 3, and 5 use the entirety of the data, while 2, 4, and 6 use only points within the IK Bandwidth. The coefficient of interest is `Win, 2010'.", add.lines=list(c("BW",1,round(RD1a$bw[1],3), 1 ,round(RD1b$bw[1],3), 1, round(RD1c$bw[1],3))), omit.stat=c("f","ser","ll","aic"), se=list(m1aOLSses,m1ases,m1bOLSses,m1bses,m1cOLSses,m1cses), float.env="table*", model.names=TRUE)


# Write csv file for use in RelativesAnalysis code
write.csv(samp_inc, file="data/samp_inc.csv", row.names=FALSE)


# ------------------------------------- #
# ----- Coefficient Plot (fig. 1) ----- #
# ------------------------------------- #

# Generate Centered, Scaled DVs in duplicate data set for coefficient plots 
samp_inc2 <- samp_inc
	samp_inc2$ran13 <- scale(samp_inc2$ran13, center=TRUE, scale=TRUE) 
	samp_inc2$voteShare <- scale(samp_inc2$voteShare, center=TRUE, scale=TRUE) 
	samp_inc2$win <- scale(samp_inc2$win, center=TRUE, scale=TRUE) 

# Use RDestimate
RD1a <- RDestimate(formula= ran13 ~ MV, cutpoint=0, subset=samp_inc2$term!=3, data=samp_inc2, cluster=as.factor(samp_inc2$province.10), model=TRUE, bw=NULL)
summary(RD1a)
plot(RD1a)

# ----------------------- DV 2: Vote Share in t+1 ------------------------- #
RD1b <- RDestimate(formula= voteShare ~ MV, cutpoint=0, subset=samp_inc2$term!=3, data=samp_inc2, cluster=as.factor(samp_inc2$province.10), model=TRUE, bw=NULL)
summary(RD1b)
plot(RD1b)


# ------------ DV 3: Probability of running and winning in t+1 ------------- #
RD1c <- RDestimate(formula= win ~ MV, cutpoint=0, subset=samp_inc2$term!=3, data=samp_inc2, cluster=as.factor(samp_inc2$province.10), model=TRUE, bw=NULL)
summary(RD1c)
plot(RD1c)


# library(ggplot2)
# library(grid)
# library(gridExtra)
# library(ggthemes)

getcoef <- function(x){x$model[[1]]$coefficients["Tr"]}
getses <- function(x){summary(x$model[[1]])$coefficients[,2][2]}
getses <- function(x){summary(x)$coefficients[1, "Std. Error"]}
ms <- list(RD1a, RD1b, RD1c)
cs <- unlist(lapply(ms, getcoef))
ses <- unlist(lapply(ms, getses))
upr <- cs + (1.96*ses)
lwr <- cs - (1.96*ses)
dat <- data.frame(cs=as.numeric(cs), upr=as.numeric(upr), lwr=as.numeric(lwr), group=as.factor(1:length(cs))) # 
# dat$group <- as.factor(dat$group)
rownames(dat) <- NULL
dat <- data.frame(dat, type=factor(x=c("Running for Office","Vote Share","Re-election"), levels=c("Running for Office","Vote Share","Re-election")))

ap <- ggplot(dat, aes(y=cs, x=type, colour=type)) + geom_hline(x=0, col="red", alpha=.5) + geom_point(stat="identity") + geom_errorbar(aes(ymax=upr, ymin=lwr), position=position_dodge(0.9), width=.5)  + theme_pander() + theme(legend.position="none")+ xlab("") + ylab("Standard Deviations") + theme(axis.text=element_text(size=20), axis.title.y = element_text(size=20), plot.title = element_text(size=20)) + scale_y_continuous(limits = c(-.2,.6)) + labs(title="Barangay Captains")

pdf("figures/rdd1Coefficients.pdf", width=8, height=5)
	ap 
dev.off()






